{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "from final_features_process import *\n",
    "from DF_Model import *\n",
    "import pickle\n",
    "import os\n",
    "import numpy as np\n",
    "import time\n",
    "from sklearn.utils import shuffle\n",
    "from keras import backend as K\n",
    "from keras.utils import np_utils\n",
    "from keras.optimizers import Adamax\n",
    "import tensorflow as tf\n",
    "import keras.backend.tensorflow_backend as ktf\n",
    "from os import path\n",
    "import random\n",
    "\n",
    "random.seed(583004949)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "datasets = ['Undefended', 'WTF-PAD', 'W-T-Simulated', 'W-T-Real', 'Onion-Sites']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = datasets[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Provide Data Path\n",
    "data_root = '/home/saidur/data/mohsen_backup/raw-data/raw-data-50-1000/'\n",
    "save_path = os.getcwd() + '/' + 'save_data/' + str(dataset) + '/'\n",
    "\n",
    "try:\n",
    "    os.stat(save_path)\n",
    "except:\n",
    "    os.makedirs(save_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check whether the files for training the model already exists\n",
    "# If not, it will process the raw data and create the files.\n",
    "model_files = ['X_tr', 'Y_tr', 'X_vl', 'Y_vl', 'X_te', 'Y_te']\n",
    "count_m_files = 0\n",
    "for f in model_files:\n",
    "    f = save_path + f + '.pkl'\n",
    "    if path.exists(f):\n",
    "        count_m_files +=1\n",
    "if count_m_files == 6:\n",
    "    X_train, y_train, X_valid, y_valid, X_test, y_test = final_data_load(save_path)\n",
    "else:\n",
    "    # Option 1: Processing raw data . Takes a long time. Uncomment if you choose Option 1.\n",
    "    # Download the raw data from the google drive and put the data into data_root.\n",
    "    # X_train, y_train, X_valid, y_valid, X_test, y_test = final_process(dataset, data_root, save_path)\n",
    "    \n",
    "    # Option 2: Download the processed data from google drive and put those into the save_path\n",
    "    X_train, y_train, X_valid, y_valid, X_test, y_test = final_data_load(save_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "K.set_image_dim_ordering(\"tf\")\n",
    "\n",
    "if dataset == datasets[0]:\n",
    "    num_classes = 95\n",
    "elif dataset == datasets[1]:\n",
    "    num_classes = 95\n",
    "elif dataset ==  datasets[2]:\n",
    "    num_classes = 100\n",
    "elif dataset == datasets[3]:\n",
    "    num_classes = 100\n",
    "else:\n",
    "    num_classes = 538\n",
    "    \n",
    "# Convert data as float32 type\n",
    "X_train = X_train.astype('float32')\n",
    "X_valid = X_valid.astype('float32')\n",
    "X_test = X_test.astype('float32')\n",
    "y_train = y_train.astype('float32')\n",
    "y_valid = y_valid.astype('float32')\n",
    "y_test = y_test.astype('float32')\n",
    "\n",
    "# we need a [Length x 1] x n shape as input to the DF CNN (Tensorflow)\n",
    "X_train = X_train[:, :,np.newaxis]\n",
    "X_valid = X_valid[:, :,np.newaxis]\n",
    "X_test = X_test[:, :,np.newaxis]\n",
    "\n",
    "print(X_train.shape[0], 'train samples')\n",
    "print(X_valid.shape[0], 'validation samples')\n",
    "print(X_test.shape[0], 'test samples')\n",
    "\n",
    "# Convert class vectors to categorical classes matrices\n",
    "y_train = np_utils.to_categorical(y_train, num_classes)\n",
    "y_valid = np_utils.to_categorical(y_valid, num_classes)\n",
    "y_test = np_utils.to_categorical(y_test, num_classes)\n",
    "\n",
    "\n",
    "run_trial = 1 # change run_trial > 1 to get a standard deviation of the accuracy.\n",
    "seq_length = 160 # 8 timing features x 20 bins = 160 features values.\n",
    "num_epochs = 100 # 100 epochs for experiments with timing_features and onion_sites\n",
    "VERBOSE = 0\n",
    "df_res = [None] * run_trial\n",
    "for j in range(run_trial):\n",
    "    df_res[j] = df_accuracy(num_classes, num_epochs, seq_length,VERBOSE, X_train, y_train, X_valid, y_valid, X_test, y_test)\n",
    "\n",
    "if run_trial !=1:\n",
    "    print('Mean Acc: ',np.mean(df_res))\n",
    "    print('STD of Mean: ',np.std(df_res))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
